Atmallen8's group workspace
Group: Pythia 1.3B_lepj8rtx
Name
16 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation
adlr_autoresume
adlr_autoresume_interval
apply_query_key_layer_scaling
attention_config
attention_dropout
attention_softmax_in_fp32
batch_size
bias_dropout_fusion
bias_gelu_fusion
char_level_ppl
checkpoint_activations
checkpoint_factor
checkpoint_in_cpu
checkpoint_num_layers
checkpoint_scale
checkpoint_validation_with_forward_pass
clip_grad
config_files.1-3B.yml
config_files.1.4B-0.5MtokBS.yml.attention-config
config_files.1.4B-0.5MtokBS.yml.attention-dropout
config_files.1.4B-0.5MtokBS.yml.bias-gelu-fusion
config_files.1.4B-0.5MtokBS.yml.checkpoint-activations
config_files.1.4B-0.5MtokBS.yml.checkpoint-factor
config_files.1.4B-0.5MtokBS.yml.checkpoint-num-layers
config_files.1.4B-0.5MtokBS.yml.data-impl
config_files.1.4B-0.5MtokBS.yml.deepspeed_slurm
config_files.1.4B-0.5MtokBS.yml.distributed-backend
config_files.1.4B-0.5MtokBS.yml.eval-interval
config_files.1.4B-0.5MtokBS.yml.eval-iters
config_files.1.4B-0.5MtokBS.yml.extra-save-iters
config_files.1.4B-0.5MtokBS.yml.fp16.enabled
config_files.1.4B-0.5MtokBS.yml.fp16.fp16
config_files.1.4B-0.5MtokBS.yml.fp16.hysteresis
config_files.1.4B-0.5MtokBS.yml.fp16.initial_scale_power
config_files.1.4B-0.5MtokBS.yml.fp16.loss_scale
config_files.1.4B-0.5MtokBS.yml.fp16.loss_scale_window
config_files.1.4B-0.5MtokBS.yml.fp16.min_loss_scale
config_files.1.4B-0.5MtokBS.yml.gas
config_files.1.4B-0.5MtokBS.yml.gpt-j-residual
config_files.1.4B-0.5MtokBS.yml.gradient_clipping
config_files.1.4B-0.5MtokBS.yml.hidden-dropout
config_files.1.4B-0.5MtokBS.yml.hidden-size
config_files.1.4B-0.5MtokBS.yml.init_method
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 53s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 53s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 52s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 53s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 59s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 50m 1s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
schoelkopf
v1
2d 12h 49m 54s
-
gelu
false
1000
false
["global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global","global"]
0
false
16
false
true
false
true
-
false
1
-
false
1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
1-16
of 16